In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import plotly.express as px
import plotly.graph_objects as go
import datetime
from scipy import stats
import numpy as np
import statsmodels.api as sm 
from statsmodels.formula.api import ols
import warnings
warnings.filterwarnings('ignore')
path = r'D:\\NUS\\Seat_study1\\output_data\\'
anthro_path = r'D:\\NUS\\Seat_study1\\anthro_data\\'
In [2]:
# Helper plotting functions
def scatter_ax(ax, x, y, xlabel, title):
    ax.scatter(x, y, alpha=0.6)
    X = sm.add_constant(np.asarray(x,float))
    display(sm.OLS(np.asarray(y,float), X).fit().summary())
    beta = sm.OLS(np.asarray(y,float), X).fit().params
    xx = np.linspace(min(x), max(x), 100)
    yy = beta[0] + beta[1]*xx
    ax.plot(xx, yy, "--", color="black", linewidth=1.2, alpha=0.8)
    ax.set_xlabel(xlabel)
    ax.set_ylabel("Discomfort increase per hour (slope)")
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
In [3]:
df_ratings_all = pd.read_csv(path +  'df_ratings_all.csv')
df_ratings_all.slope_hour = df_ratings_all.slope_hour/10
df_ratings_all[:2]
Out[3]:
rating1 rating2 rating3 rating4 rating5 rating6 rating7 rating8 rating9 rating10 ... intercept r-square rmse Significant ll ul slope_hour slope_direction max_rating slope_group
0 10.0 10.0 10.0 10.0 11.0 11.0 11.0 10.0 12.0 12.0 ... 8.752 0.75784 1.39106 True 0.0159 0.0281 0.132 P 20.0 Below_3.1hrs
1 10.0 10.0 10.0 10.0 10.0 10.0 12.0 10.0 12.0 13.0 ... 9.515 0.74473 0.97788 True 0.0107 0.0193 0.090 P 16.0 Below_3.1hrs

2 rows × 60 columns

In [4]:
df_ratings_all.slope_hour.describe()
Out[4]:
count    208.000000
mean       0.177577
std        0.207520
min       -0.372000
25%        0.024000
50%        0.102000
75%        0.282000
max        0.936000
Name: slope_hour, dtype: float64
In [5]:
df_ratings_all[['slope_hour' , 'Seat_angle']].info()
<class 'pandas.DataFrame'>
RangeIndex: 208 entries, 0 to 207
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   slope_hour  208 non-null    float64
 1   Seat_angle  208 non-null    str    
dtypes: float64(1), str(1)
memory usage: 3.4 KB
In [6]:
df_ratings_all.Seat_angle.isna().sum()
Out[6]:
0
In [7]:
null_vals = ['', 'nan', 'N/A', None, 'na', 'None', 'none']
replaced_null = { item: np.nan for item in null_vals }
df_ratings_all['Seat_angle'].replace(replaced_null, inplace=True)
df_ratings_all.Seat_angle.isna().sum()
Out[7]:
0

Fix seat angles - 174 participants¶

In [8]:
temp = df_ratings_all[~df_ratings_all['Seat_angle'].str.contains('[- ,]')]
temp.Seat_angle.info()
temp.Seat_angle = temp.Seat_angle.astype(float)
plt.hist(temp.Seat_angle);
fig = px.box(temp,  y= 'Seat_angle', notched = True,  )
             #color = 'Weight_status')
# Setting boxmean="sd" displays both the mean (dashed line) and standard deviation (filled area)
fig.update_traces(
    marker_line_color="black", # Sets the color of the lines bounding the box
    boxmean="sd",              # Sets the mean marker to a standard deviation shape
    selector=dict(type='box')
)

fig.update_layout(width=300, height=500, yaxis_title = 'Seat angle for 174 subjects')
fig.show();
<class 'pandas.Series'>
Index: 174 entries, 0 to 207
Series name: Seat_angle
Non-Null Count  Dtype
--------------  -----
174 non-null    str  
dtypes: str(1)
memory usage: 2.7 KB
In [9]:
# Create combined figure
fig, axes = plt.subplots(figsize=(6,4))
scatter_ax(axes, temp["Seat_angle"], temp["slope_hour"], "Seat angle", "Discomfort increase per hour vs Seat_angle")
######### Seat angle is not significant (p-value=0.090) at 5% but not at 1% significance level
OLS Regression Results
Dep. Variable: y R-squared: 0.017
Model: OLS Adj. R-squared: 0.011
Method: Least Squares F-statistic: 2.903
Date: Thu, 22 Jan 2026 Prob (F-statistic): 0.0902
Time: 13:40:22 Log-Likelihood: 26.322
No. Observations: 174 AIC: -48.64
Df Residuals: 172 BIC: -42.33
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 0.0533 0.074 0.721 0.472 -0.093 0.199
x1 0.0050 0.003 1.704 0.090 -0.001 0.011
Omnibus: 36.600 Durbin-Watson: 1.892
Prob(Omnibus): 0.000 Jarque-Bera (JB): 53.317
Skew: 1.183 Prob(JB): 2.64e-12
Kurtosis: 4.326 Cond. No. 117.


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

All Seat angles - 208 participants (seat angles imputed using mean for each subject using its range of seat angle during study)¶

In [10]:
######### Sample of seat angles when it was not fixed ##########
df_ratings_all[df_ratings_all['Seat_angle'].str.contains('[- ,]')]['Seat_angle'][:3]
Out[10]:
67    15-36
72    22-32
83    17-21
Name: Seat_angle, dtype: str
In [11]:
def mean_from_cell(cell):
    if  '-' in cell or ',' in cell:
        numbers = list(map(int, re.split(r'[-,]', cell)))
        return np.mean(numbers)
    else:
        return cell
df_ratings_all['Seat_angle'] = df_ratings_all['Seat_angle'].apply(lambda x: mean_from_cell(x))
df_ratings_all.Seat_angle = df_ratings_all.Seat_angle.astype(float)
In [12]:
plt.hist(df_ratings_all.Seat_angle);
In [13]:
fig = px.box(df_ratings_all,  y= 'Seat_angle', notched = True,  )
             #color = 'Weight_status')
# Setting boxmean="sd" displays both the mean (dashed line) and standard deviation (filled area)
fig.update_traces(
    marker_line_color="black", # Sets the color of the lines bounding the box
    boxmean="sd",              # Sets the mean marker to a standard deviation shape
    selector=dict(type='box')
)

fig.update_layout(width=300, height=500, yaxis_title = 'Seat angle for 208 subjects')
fig.show();
In [14]:
# Create combined figure
fig, axes = plt.subplots(figsize=(6,4))
scatter_ax(axes, df_ratings_all["Seat_angle"], df_ratings_all["slope_hour"], "Seat angle", "Discomfort increase per hour vs Seat_angle")
######### Seat angle is significant (p-value = 0.027) at 5% but not at 1% significance level
OLS Regression Results
Dep. Variable: y R-squared: 0.023
Model: OLS Adj. R-squared: 0.019
Method: Least Squares F-statistic: 4.944
Date: Thu, 22 Jan 2026 Prob (F-statistic): 0.0273
Time: 13:40:24 Log-Likelihood: 34.915
No. Observations: 208 AIC: -65.83
Df Residuals: 206 BIC: -59.15
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 0.0321 0.067 0.480 0.632 -0.100 0.164
x1 0.0058 0.003 2.223 0.027 0.001 0.011
Omnibus: 38.370 Durbin-Watson: 1.945
Prob(Omnibus): 0.000 Jarque-Bera (JB): 54.137
Skew: 1.113 Prob(JB): 1.76e-12
Kurtosis: 4.137 Cond. No. 120.


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]:
 
In [ ]: